# Library for handling data
import pandas as pd
# Library for UMAP projections
from umap.umap_ import UMAP
# Library for visualization
import plotly.express as px
df = pd.read_csv('hope_miRactivity_results.csv')
df
| Kids_First_Biospecimen_ID | sample_id | aliquot_id | RNA_library | short_histology | molecular_subtype | age_at_diagnosis_days | reported_gender | age_group | MIMAT0010195 | ... | MIMAT0004986 | MIMAT0004987 | MIMAT0000094 | MIMAT0026473 | MIMAT0004510 | MIMAT0000095 | MIMAT0022842 | MIMAT0000097 | MIMAT0004678 | MIMAT0000689 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | BS_VF5XWFXD | 7316-2594 | 574553 | stranded | HGAT | DMG, H3 K28, TP53 loss | 8 | Male | 5-10 | 0.046957 | ... | -0.055680 | 0.019986 | 0.303817 | -0.039460 | -0.012004 | 0.149087 | 0.032207 | 0.293745 | -0.008587 | 0.184319 |
| 1 | BS_W60RQAKK | 7316-371 | 734516 | stranded | HGAT | HGG, To be classified | 5 | Female | 0-5 | 0.184673 | ... | 0.117588 | 0.138313 | 0.404067 | 0.093777 | 0.093444 | 0.253196 | 0.126721 | 0.403668 | 0.119945 | 0.299600 |
| 2 | BS_TK84K675 | 7316-2751 | 711698 | stranded | HGAT | HGG, H3 wildtype | 3 | Female | 0-5 | 0.105755 | ... | 0.028216 | 0.057141 | 0.239396 | 0.027004 | 0.072512 | 0.151019 | 0.115442 | 0.283237 | 0.088276 | 0.205366 |
| 3 | BS_HWGWYCY7 | 7316-1455 | 400856 | poly-A | HGAT | HGG, To be classified | 2 | Female | 0-5 | -0.104705 | ... | 0.054945 | -0.063921 | -0.013288 | -0.005415 | 0.080462 | 0.034499 | -0.035228 | 0.001027 | 0.142655 | 0.086316 |
| 4 | BS_RX1YTZ7F | 7316-388 | 398449 | poly-A | HGAT | DMG, H3 K28, TP53 activated | 5 | Female | 0-5 | 0.113665 | ... | 0.032385 | 0.063875 | 0.341264 | 0.028191 | 0.050142 | 0.189022 | 0.087262 | 0.338444 | 0.034852 | 0.213586 |
| 5 | BS_XZM79E42 | 7316-409 | 398448 | poly-A | HGAT | DMG, H3 K28, TP53 activated | 12 | Female | 10-15 | 0.097259 | ... | -0.004175 | 0.044509 | 0.271858 | 0.006494 | 0.033367 | 0.142336 | 0.086633 | 0.236862 | 0.067023 | 0.134444 |
| 6 | BS_XQ5SFW35 | 7316-89 | 577715 | stranded | HGAT | HGG, H3 wildtype | 4 | Male | 0-5 | 0.134111 | ... | 0.040414 | 0.103520 | 0.369665 | 0.038248 | 0.061328 | 0.207337 | 0.117338 | 0.378433 | 0.049278 | 0.259048 |
| 7 | BS_MX23ZY0Y | 7316-195 | 739938 | stranded | HGAT | DMG, H3 K28, TP53 loss | 9 | Male | 5-10 | -0.039391 | ... | -0.081045 | -0.076824 | 0.215427 | -0.108573 | -0.122767 | 0.074374 | -0.091994 | 0.190338 | -0.066512 | 0.079004 |
| 8 | BS_4PPHAQXF | 7316-870 | 398453 | poly-A | HGAT | HGG, H3 wildtype | 10 | Female | 5-10 | 0.080769 | ... | -0.010550 | -0.003904 | 0.278134 | -0.054702 | -0.024307 | 0.113260 | 0.039207 | 0.280828 | 0.012331 | 0.172361 |
| 9 | BS_0RQ4P069 | 7316-1746 | 739942 | stranded | HGAT | HGG, H3 wildtype, TP53 loss | 6 | Female | 5-10 | -0.151490 | ... | -0.085015 | -0.252793 | -0.226109 | -0.139109 | -0.086405 | -0.268872 | -0.104461 | -0.237889 | 0.019412 | -0.246581 |
| 10 | BS_H0QWRJE2 | 7316-2146 | 564445 | stranded | HGAT | HGG, H3 wildtype | 3 | Female | 0-5 | 0.028185 | ... | -0.075678 | -0.071681 | 0.179788 | -0.075186 | -0.003470 | 0.073911 | 0.026267 | 0.198661 | 0.041545 | 0.144978 |
| 11 | BS_49BQS7Z6 | 7316-2151 | 711713 | stranded | HGAT | HGG, H3 wildtype | 6 | Male | 5-10 | 0.098388 | ... | -0.004243 | 0.026573 | 0.298890 | -0.008147 | 0.036768 | 0.111029 | 0.089668 | 0.309467 | 0.047102 | 0.227748 |
| 12 | BS_A0DYVX9J | 7316-2140 | 470436 | stranded | HGAT | HGG, H3 wildtype, TP53 loss | 20 | Male | >15 | 0.077432 | ... | -0.000316 | 0.011083 | 0.312797 | -0.039469 | -0.013298 | 0.188620 | 0.030691 | 0.288824 | 0.041812 | 0.195492 |
| 13 | BS_0VXZCRJS | 7316-466 | 401561 | poly-A | HGAT | DMG, H3 K28 | 7 | Male | 5-10 | 0.023542 | ... | -0.109889 | -0.044252 | 0.293370 | -0.123075 | -0.109265 | 0.100999 | -0.034502 | 0.277360 | -0.080691 | 0.115090 |
| 14 | BS_1A6MQ9ZA | 7316-1769 | 470030 | stranded | HGAT | DMG, H3 K28 | 15 | Male | 10-15 | 0.069749 | ... | -0.037707 | 0.033932 | 0.333838 | -0.039091 | 0.003598 | 0.195120 | 0.032581 | 0.319072 | 0.022263 | 0.208367 |
| 15 | BS_AWH9757B | 7316-3769 | 728271 | stranded | HGAT | HGG, H3 wildtype | 9 | Female | 5-10 | 0.216734 | ... | 0.121410 | 0.162101 | 0.442702 | 0.098945 | 0.132501 | 0.294107 | 0.190197 | 0.447034 | 0.111729 | 0.334226 |
| 16 | BS_V3Z3DB4N | 7316-3765 | 728273 | stranded | HGAT | HGG, H3 wildtype, TP53 loss | 7 | Female | 5-10 | 0.162427 | ... | 0.060295 | 0.104771 | 0.402539 | 0.041316 | 0.071960 | 0.239335 | 0.129007 | 0.407877 | 0.045677 | 0.300105 |
| 17 | BS_5GNQC2FF | 7316-2176 | 739936 | stranded | HGAT | HGG, H3 wildtype | 17 | Female | >15 | -0.168451 | ... | -0.103250 | -0.279049 | -0.235817 | -0.160501 | -0.100873 | -0.278256 | -0.119185 | -0.245856 | 0.005448 | -0.248443 |
| 18 | BS_6WP1FHTE | 7316-1763 | 549574 | stranded | HGAT | DMG, H3 K28, TP53 loss | 10 | Female | 5-10 | 0.121233 | ... | 0.027616 | 0.098796 | 0.376402 | 0.039615 | 0.076167 | 0.225092 | 0.133182 | 0.373657 | 0.036720 | 0.262401 |
| 19 | BS_ZD5HN296 | 7316-445 | 401741 | poly-A | HGAT | HGG, H3 wildtype | 6 | Male | 5-10 | 0.154198 | ... | 0.032801 | 0.080855 | 0.332752 | 0.043216 | 0.080150 | 0.185049 | 0.143249 | 0.357416 | 0.083080 | 0.262384 |
| 20 | BS_JS95PE0J | 7316-1774 | 574549 | stranded | HGAT | HGG, H3 wildtype, TP53 loss | 7 | Male | 5-10 | 0.156722 | ... | 0.081838 | 0.100529 | 0.339750 | 0.073451 | 0.112922 | 0.221652 | 0.148865 | 0.334720 | 0.125234 | 0.256886 |
| 21 | BS_TJTEF70D | 7316-895 | 470020 | stranded | HGAT | HGG, H3 wildtype | 3 | Female | 0-5 | 0.159911 | ... | 0.037145 | 0.068441 | 0.327287 | 0.038058 | 0.102268 | 0.223956 | 0.141065 | 0.341046 | 0.129624 | 0.274696 |
| 22 | BS_M8EA6R2A | 7316-913 | 739931 | stranded | HGAT | HGG, H3 wildtype | 3 | Female | 0-5 | -0.111993 | ... | -0.106253 | -0.157951 | 0.123723 | -0.148130 | -0.149451 | 0.011854 | -0.215547 | 0.103965 | -0.091319 | 0.007893 |
| 23 | BS_23QW0BBA | 7316-114 | 577714 | stranded | HGAT | HGG, H3 wildtype | 10 | Female | 5-10 | -0.016528 | ... | -0.046973 | -0.004073 | 0.280399 | -0.060845 | -0.058444 | 0.134143 | -0.037533 | 0.273988 | -0.082397 | 0.112136 |
| 24 | BS_WKESC3XN | 7316-3303 | 734524 | stranded | HGAT | HGG, H3 wildtype, TP53 loss | 18 | Male | >15 | 0.123707 | ... | 0.046442 | 0.081986 | 0.332561 | 0.022124 | 0.039491 | 0.157843 | 0.094590 | 0.337584 | 0.064393 | 0.235651 |
| 25 | BS_4B0BAVTX | 7316-2152 | 574550 | stranded | HGAT | DMG, H3 K28, TP53 loss | 12 | Male | 10-15 | 0.083742 | ... | 0.000488 | 0.033752 | 0.272690 | -0.028016 | 0.009667 | 0.137883 | 0.070368 | 0.275965 | 0.049212 | 0.189988 |
26 rows × 2059 columns
#print(df.molecular_subtype.unique().tolist())
print(df.molecular_subtype.value_counts())
HGG, H3 wildtype 11 HGG, H3 wildtype, TP53 loss 5 DMG, H3 K28, TP53 loss 4 HGG, To be classified 2 DMG, H3 K28, TP53 activated 2 DMG, H3 K28 2 Name: molecular_subtype, dtype: int64
umap_2d = UMAP(n_components=2, init='random', random_state=0)
proj_2d = umap_2d.fit_transform(df.iloc[:,9:])
fig1 = px.scatter(proj_2d, x=0, y=1, color=df['molecular_subtype'].tolist(), title='Hope Clustering based on Molecular Subtype (ActMiR)')
fig2 = px.scatter(proj_2d, x=0, y=1, color=df['reported_gender'].tolist(), title='Hope Clustering based on Patient Gender (ActMiR)')
fig3 = px.scatter(proj_2d, x=0, y=1, color=df['age_group'].tolist(), title='Hope Clustering based on Patient Age (ActMiR)')
fig1.update_xaxes(title_text='UMAP1')
fig1.update_yaxes(title_text='UMAP2')
fig2.update_xaxes(title_text='UMAP1')
fig2.update_yaxes(title_text='UMAP2')
fig3.update_xaxes(title_text='UMAP1')
fig3.update_yaxes(title_text='UMAP2')
fig1.show()
fig2.show()
fig3.show()